From 3bd36ccc5027936810134b3b0ddbf7aebdf73d58 Mon Sep 17 00:00:00 2001 From: "rneugeba@wyvis.research.intel-research.net" Date: Wed, 2 Mar 2005 17:18:39 +0000 Subject: [PATCH] bitkeeper revision 1.1242 (4225f56fwo6ym-RMTBheAeYhl10ATQ) forward ported James Bulpin's performance counters tool Signed-off-by: michael.fetterman@cl.cam.ac.uk --- .rootkeys | 8 + tools/libxc/plan9a.out.h | 0 tools/libxc/xc.h | 5 + tools/libxc/xc_misc.c | 33 ++ tools/libxc/xc_plan9_build.c | 0 tools/misc/Makefile | 3 + tools/misc/cpuperf/Makefile | 51 ++ tools/misc/cpuperf/README.txt | 371 +++++++++++++ tools/misc/cpuperf/cpuperf.c | 301 +++++++++++ tools/misc/cpuperf/cpuperf_perfcntr.h | 41 ++ tools/misc/cpuperf/cpuperf_xeno.h | 38 ++ tools/misc/cpuperf/module/Makefile | 16 + tools/misc/cpuperf/module/perfcntr.c | 730 ++++++++++++++++++++++++++ tools/misc/cpuperf/p4perf.h | 382 ++++++++++++++ 14 files changed, 1979 insertions(+) mode change 100755 => 100644 tools/libxc/plan9a.out.h mode change 100755 => 100644 tools/libxc/xc_plan9_build.c create mode 100644 tools/misc/cpuperf/Makefile create mode 100644 tools/misc/cpuperf/README.txt create mode 100644 tools/misc/cpuperf/cpuperf.c create mode 100644 tools/misc/cpuperf/cpuperf_perfcntr.h create mode 100644 tools/misc/cpuperf/cpuperf_xeno.h create mode 100644 tools/misc/cpuperf/module/Makefile create mode 100644 tools/misc/cpuperf/module/perfcntr.c create mode 100644 tools/misc/cpuperf/p4perf.h diff --git a/.rootkeys b/.rootkeys index 3a0d744343..b7b65f0953 100644 --- a/.rootkeys +++ b/.rootkeys @@ -586,6 +586,14 @@ 41a216cayFe2FQroFuzvNPw1AvNiqQ tools/libxutil/util.c 41a216ca7mgVSnCBHPCLkGOIqPS1CQ tools/libxutil/util.h 3f776bd2Xd-dUcPKlPN2vG89VGtfvQ tools/misc/Makefile +4225f56d7sa9aEARfjNeCVTMYDAmZA tools/misc/cpuperf/Makefile +4225f56dS5TGdKojmuBnrV3PzbE6Rg tools/misc/cpuperf/README.txt +4225f56dcodvBSPoWYS6kvwZCQhgzg tools/misc/cpuperf/cpuperf.c +4225f56dMjZK14EWd8K0gq4v5Diwjg tools/misc/cpuperf/cpuperf_perfcntr.h +4225f56d_XjSY1297IiH96qeqD4sCA tools/misc/cpuperf/cpuperf_xeno.h +4225f56dqlGC_UZ681F95mCgLbOeHQ tools/misc/cpuperf/module/Makefile +4225f56dnmms-VFr1MiDVG_dYoM7IQ tools/misc/cpuperf/module/perfcntr.c +4225f56dYhIGQRD_kKVJ6xQrkqO0YQ tools/misc/cpuperf/p4perf.h 40ab2cfawIw8tsYo0dQKtp83h4qfTQ tools/misc/fakei386xen 3f6dc136ZKOjd8PIqLbFBl_v-rnkGg tools/misc/miniterm/Makefile 3f6dc140C8tAeBfroAF24VrmCS4v_w tools/misc/miniterm/README diff --git a/tools/libxc/plan9a.out.h b/tools/libxc/plan9a.out.h old mode 100755 new mode 100644 diff --git a/tools/libxc/xc.h b/tools/libxc/xc.h index 81d7b2ed3f..926a0a2033 100644 --- a/tools/libxc/xc.h +++ b/tools/libxc/xc.h @@ -370,6 +370,11 @@ int xc_perfc_control(int xc_handle, u32 op, xc_perfc_desc_t *desc); +/* read/write msr */ +long long xc_msr_read(int xc_handle, int cpu_mask, int msr); +int xc_msr_write(int xc_handle, int cpu_mask, int msr, unsigned int low, + unsigned int high); + /** * Memory maps a range within one domain to a local address range. Mappings * should be unmapped with munmap and should follow the same rules as mmap diff --git a/tools/libxc/xc_misc.c b/tools/libxc/xc_misc.c index 0efd1f1972..9ce8548709 100644 --- a/tools/libxc/xc_misc.c +++ b/tools/libxc/xc_misc.c @@ -97,3 +97,36 @@ int xc_perfc_control(int xc_handle, return (rc == 0) ? dop.u.perfccontrol.nr_counters : rc; } + +long long xc_msr_read(int xc_handle, int cpu_mask, int msr) +{ + int rc; + dom0_op_t op; + + op.cmd = DOM0_MSR; + op.u.msr.write = 0; + op.u.msr.msr = msr; + op.u.msr.cpu_mask = cpu_mask; + + rc = do_dom0_op(xc_handle, &op); + + return (((unsigned long long)op.u.msr.out2)<<32) | op.u.msr.out1 ; +} + +int xc_msr_write(int xc_handle, int cpu_mask, int msr, unsigned int low, + unsigned int high) +{ + int rc; + dom0_op_t op; + + op.cmd = DOM0_MSR; + op.u.msr.write = 1; + op.u.msr.msr = msr; + op.u.msr.cpu_mask = cpu_mask; + op.u.msr.in1 = low; + op.u.msr.in2 = high; + + rc = do_dom0_op(xc_handle, &op); + + return rc; +} diff --git a/tools/libxc/xc_plan9_build.c b/tools/libxc/xc_plan9_build.c old mode 100755 new mode 100644 diff --git a/tools/misc/Makefile b/tools/misc/Makefile index 39bfc424e6..39b38c25c1 100644 --- a/tools/misc/Makefile +++ b/tools/misc/Makefile @@ -21,18 +21,21 @@ INSTALL_SBIN = netfix xm xend xensv xenperf all: $(TARGETS) $(MAKE) -C miniterm + $(MAKE) -C cpuperf install: all [ -d $(DESTDIR)/usr/bin ] || $(INSTALL_DIR) $(DESTDIR)/usr/bin [ -d $(DESTDIR)/usr/sbin ] || $(INSTALL_DIR) $(DESTDIR)/usr/sbin $(INSTALL_PROG) $(INSTALL_BIN) $(DESTDIR)/usr/bin $(INSTALL_PROG) $(INSTALL_SBIN) $(DESTDIR)/usr/sbin + $(MAKE) -C cpuperf install # No sense in installing miniterm on the Xen box. # $(MAKE) -C miniterm install clean: $(RM) *.o $(TARGETS) *~ $(MAKE) -C miniterm clean + $(MAKE) -C cpuperf clean %.o: %.c $(HDRS) Makefile $(CC) -c $(CFLAGS) -o $@ $< diff --git a/tools/misc/cpuperf/Makefile b/tools/misc/cpuperf/Makefile new file mode 100644 index 0000000000..92b334fe7a --- /dev/null +++ b/tools/misc/cpuperf/Makefile @@ -0,0 +1,51 @@ +# +# Make Performance counter tool +# +# $Id: Makefile,v 1.1 2003/10/13 16:49:44 jrb44 Exp $ +# +# $Log: Makefile,v $ +# Revision 1.1 2003/10/13 16:49:44 jrb44 +# Initial revision +# +# + +INSTALL = install +INSTALL_PROG = $(INSTALL) -m0755 +INSTALL_DIR = $(INSTALL) -d -m0755 + +# these are for Xen +XEN_ROOT=../../.. +include $(XEN_ROOT)/tools/Rules.mk + +CC = gcc +CFLAGS = -Wall -O3 + +HDRS = $(wildcard *.h) +SRCS = $(wildcard *.c) +OBJS = $(patsubst %.c,%.o,$(SRCS)) + +TARGETS = cpuperf-xen cpuperf-perfcntr + +INSTALL_BIN = $(TARGETS) + + +all: $(TARGETS) + +clean: + $(RM) *.o $(TARGETS) + +%: %.c $(HDRS) Makefile + $(CC) $(CFLAGS) -o $@ $< + +cpuperf-xen: cpuperf.c $(HDRS) Makefile + $(CC) $(CFLAGS) -I $(XEN_LIBXC) -L$(XEN_LIBXC) -lxc -L$(XEN_LIBXUTIL) -lxutil -DXENO -o $@ $< + +cpuperf-perfcntr: cpuperf.c $(HDRS) Makefile + $(CC) $(CFLAGS) -DPERFCNTR -o $@ $< + +install: all + $(INSTALL_PROG) $(INSTALL_BIN) $(DESTDIR)/usr/bin + + +# End of $RCSfile: Makefile,v $ + diff --git a/tools/misc/cpuperf/README.txt b/tools/misc/cpuperf/README.txt new file mode 100644 index 0000000000..96fdec1e28 --- /dev/null +++ b/tools/misc/cpuperf/README.txt @@ -0,0 +1,371 @@ +Usage +===== + +Use either xen-cpuperf, cpuperf-perfcntr as appropriate to the system +in use. + +To write: + + cpuperf -E -C + + optional: all numbers in base 10 unless specified + + -d Debug mode + -c CPU number + -t ESCR thread bits - default is 12 (Thread 0 all rings) + bit 0: Thread 1 in rings 1,2,3 + bit 1: Thread 1 in ring 0 + bit 2: Thread 0 in rings 1,2,3 + bit 3: Thread 0 in ring 0 + -e Event selection number + -m Event mask bits + -T ESCR tag value + -k Sets CCCR 'compare' bit + -n Sets CCCR 'complement' bit + -g Sets CCCR 'edge' bit + -P Set the specified bit in MSR_P4_PEBS_ENABLE + -V Set the specified bit in MSR_P4_PEBS_MATRIX_VERT + (-V and -P may be used multiple times to set multiple bits.) + +To read: + + cpuperf -r + + optional: all numbers in base 10 unless specified + + -c CPU number + + values: + + BPU_CCCR0 + BPU_CCCR1 + BPU_CCCR2 + BPU_CCCR3 + MS_CCCR0 + MS_CCCR1 + MS_CCCR2 + MS_CCCR3 + FLAME_CCCR0 + FLAME_CCCR1 + FLAME_CCCR2 + FLAME_CCCR3 + IQ_CCCR0 + IQ_CCCR1 + IQ_CCCR2 + IQ_CCCR3 + IQ_CCCR4 + IQ_CCCR5 + NONE - do not program any CCCR, used when setting up an ESCR for tagging + + values: + + BSU_ESCR0 + BSU_ESCR1 + FSB_ESCR0 + FSB_ESCR1 + MOB_ESCR0 + MOB_ESCR1 + PMH_ESCR0 + PMH_ESCR1 + BPU_ESCR0 + BPU_ESCR1 + IS_ESCR0 + IS_ESCR1 + ITLB_ESCR0 + ITLB_ESCR1 + IX_ESCR0 + IX_ESCR1 + MS_ESCR0 + MS_ESCR1 + TBPU_ESCR0 + TBPU_ESCR1 + TC_ESCR0 + TC_ESCR1 + FIRM_ESCR0 + FIRM_ESCR1 + FLAME_ESCR0 + FLAME_ESCR1 + DAC_ESCR0 + DAC_ESCR1 + SAAT_ESCR0 + SAAT_ESCR1 + U2L_ESCR0 + U2L_ESCR1 + CRU_ESCR0 + CRU_ESCR1 + CRU_ESCR2 + CRU_ESCR3 + CRU_ESCR4 + CRU_ESCR5 + IQ_ESCR0 + IQ_ESCR1 + RAT_ESCR0 + RAT_ESCR1 + SSU_ESCR0 + SSU_ESCR1 + ALF_ESCR0 + ALF_ESCR1 + + +Example configurations +====================== + +Note than in most cases there is a choice of ESCRs and CCCRs for +each metric although not all combinations are allowed. Each ESCR and +counter/CCCR can be used only once. + +Mispredicted branches retired +============================= + +cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 3 -m 1 +cpuperf -E CRU_ESCR0 -C IQ_CCCR1 -e 3 -m 1 +cpuperf -E CRU_ESCR0 -C IQ_CCCR4 -e 3 -m 1 +cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 3 -m 1 +cpuperf -E CRU_ESCR1 -C IQ_CCCR3 -e 3 -m 1 +cpuperf -E CRU_ESCR1 -C IQ_CCCR5 -e 3 -m 1 + +Tracecache misses +================= + +cpuperf -E BPU_ESCR0 -C BPU_CCCR0 -e 3 -m 1 +cpuperf -E BPU_ESCR0 -C BPU_CCCR1 -e 3 -m 1 +cpuperf -E BPU_ESCR1 -C BPU_CCCR2 -e 3 -m 1 +cpuperf -E BPU_ESCR1 -C BPU_CCCR3 -e 3 -m 1 + +I-TLB +===== + +cpuperf -E ITLB_ESCR0 -C BPU_CCCR0 -e 24 +cpuperf -E ITLB_ESCR0 -C BPU_CCCR1 -e 24 +cpuperf -E ITLB_ESCR1 -C BPU_CCCR2 -e 24 +cpuperf -E ITLB_ESCR1 -C BPU_CCCR3 -e 24 + + -m : bit 0 count HITS, bit 1 MISSES, bit 2 uncacheable hit + + e.g. all ITLB misses -m 2 + +Load replays +============ + +cpuperf -E MOB_ESCR0 -C BPU_CCCR0 -e 3 +cpuperf -E MOB_ESCR0 -C BPU_CCCR1 -e 3 +cpuperf -E MOB_ESCR1 -C BPU_CCCR2 -e 3 +cpuperf -E MOB_ESCR1 -C BPU_CCCR3 -e 3 + + -m : bit mask, replay due to... + 1: unknown store address + 3: unknown store data + 4: partially overlapped data access between LD/ST + 5: unaligned address between LD/ST + +Page walks +========== + +cpuperf -E PMH_ESCR0 -C BPU_CCCR0 -e 1 +cpuperf -E PMH_ESCR0 -C BPU_CCCR1 -e 1 +cpuperf -E PMH_ESCR1 -C BPU_CCCR2 -e 1 +cpuperf -E PMH_ESCR1 -C BPU_CCCR3 -e 1 + + -m : bit 0 counts walks for a D-TLB miss, bit 1 for I-TLB miss + +L2/L3 cache accesses +==================== + +cpuperf -E BSU_ESCR0 -C BPU_CCCR0 -e 12 +cpuperf -E BSU_ESCR0 -C BPU_CCCR1 -e 12 +cpuperf -E BSU_ESCR1 -C BPU_CCCR2 -e 12 +cpuperf -E BSU_ESCR1 -C BPU_CCCR3 -e 12 + + -m : where the bit mask is: + 0: Read L2 HITS Shared + 1: Read L2 HITS Exclusive + 2: Read L2 HITS Modified + 3: Read L3 HITS Shared + 4: Read L3 HITS Exclusive + 5: Read L3 HITS Modified + 8: Read L2 MISS + 9: Read L3 MISS + 10: Write L2 MISS + +Front side bus activity +======================= + +cpuperf -E FSB_ESCR0 -C BPU_CCCR0 -e 23 -k -g +cpuperf -E FSB_ESCR0 -C BPU_CCCR1 -e 23 -k -g +cpuperf -E FSB_ESCR1 -C BPU_CCCR2 -e 23 -k -g +cpuperf -E FSB_ESCR1 -C BPU_CCCR3 -e 23 -k -g + + -m : where the bit mask is for bus events: + 0: DRDY_DRV Processor drives bus + 1: DRDY_OWN Processor reads bus + 2: DRDY_OTHER Data on bus not being sampled by processor + 3: DBSY_DRV Processor reserves bus for driving + 4: DBSY_OWN Other entity reserves bus for sending to processor + 5: DBSY_OTHER Other entity reserves bus for sending elsewhere + + e.g. -m 3 to get cycles bus actually in use. + +Pipeline clear (entire) +======================= + +cpuperf -E CRU_ESCR2 -C IQ_CCCR0 -e 2 +cpuperf -E CRU_ESCR2 -C IQ_CCCR1 -e 2 +cpuperf -E CRU_ESCR2 -C IQ_CCCR4 -e 2 +cpuperf -E CRU_ESCR3 -C IQ_CCCR2 -e 2 +cpuperf -E CRU_ESCR3 -C IQ_CCCR3 -e 2 +cpuperf -E CRU_ESCR3 -C IQ_CCCR5 -e 2 + + -m : bit mask: + 0: counts a portion of cycles while clear (use -g for edge trigger) + 1: counts each time machine clears for memory ordering issues + 2: counts each time machine clears for self modifying code + +Instructions retired +==================== + +cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 2 +cpuperf -E CRU_ESCR0 -C IQ_CCCR1 -e 2 +cpuperf -E CRU_ESCR0 -C IQ_CCCR4 -e 2 +cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 2 +cpuperf -E CRU_ESCR1 -C IQ_CCCR3 -e 2 +cpuperf -E CRU_ESCR1 -C IQ_CCCR5 -e 2 + + -m : bit mask: + 0: counts non-bogus, not tagged instructions + 1: counts non-bogus, tagged instructions + 2: counts bogus, not tagged instructions + 3: counts bogus, tagged instructions + + e.g. -m 3 to count legit retirements + +Uops retired +============ + +cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 1 +cpuperf -E CRU_ESCR0 -C IQ_CCCR1 -e 1 +cpuperf -E CRU_ESCR0 -C IQ_CCCR4 -e 1 +cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 1 +cpuperf -E CRU_ESCR1 -C IQ_CCCR3 -e 1 +cpuperf -E CRU_ESCR1 -C IQ_CCCR5 -e 1 + + -m : bit mask: + 0: Non-bogus + 1: Bogus + +x87 FP uops +=========== + +cpuperf -E FIRM_ESCR0 -C FLAME_CCCR0 -e 4 -m 32768 +cpuperf -E FIRM_ESCR0 -C FLAME_CCCR1 -e 4 -m 32768 +cpuperf -E FIRM_ESCR1 -C FLAME_CCCR2 -e 4 -m 32768 +cpuperf -E FIRM_ESCR1 -C FLAME_CCCR3 -e 4 -m 32768 + +Replay tagging mechanism +======================== + +Counts retirement of uops tagged with the replay tagging mechanism + +cpuperf -E CRU_ESCR2 -C IQ_CCCR0 -e 9 +cpuperf -E CRU_ESCR2 -C IQ_CCCR1 -e 9 +cpuperf -E CRU_ESCR2 -C IQ_CCCR4 -e 9 +cpuperf -E CRU_ESCR3 -C IQ_CCCR2 -e 9 +cpuperf -E CRU_ESCR3 -C IQ_CCCR3 -e 9 +cpuperf -E CRU_ESCR3 -C IQ_CCCR5 -e 9 + + -m : bit mask: + 0: Non-bogus (set this bit for all events listed below) + 1: Bogus + +Set replay tagging mechanism bits with -P and -V: + + L1 cache load miss retired: -P 0 -P 24 -P 25 -V 0 + L2 cache load miss retired: -P 1 -P 24 -P 25 -V 0 (read manual) + DTLB load miss retired: -P 2 -P 24 -P 25 -V 0 + DTLB store miss retired: -P 2 -P 24 -P 25 -V 1 + DTLB all miss retired: -P 2 -P 24 -P 25 -V 0 -V 1 + +e.g. to count all DTLB misses + + cpuperf -E CRU_ESCR2 -C IQ_CCCR0 -e 9 -m 1 P 2 -P 24 -P 25 -V 0 -V 1 + +Front end event +=============== + +To count tagged uops: + +cpuperf -E CRU_ESCR2 -C IQ_CCCR0 -e 8 +cpuperf -E CRU_ESCR2 -C IQ_CCCR1 -e 8 +cpuperf -E CRU_ESCR2 -C IQ_CCCR4 -e 8 +cpuperf -E CRU_ESCR3 -C IQ_CCCR2 -e 8 +cpuperf -E CRU_ESCR3 -C IQ_CCCR3 -e 8 +cpuperf -E CRU_ESCR3 -C IQ_CCCR5 -e 8 + + -m : bit 0 for non-bogus uops, bit 1 for bogus uops + +Must have another ESCR programmed to tag uops as required + +cpuperf -E RAT_ESCR0 -C NONE -e 2 +cpuperf -E RAT_ESCR1 -C NONE -e 2 + + -m : bit 1 for LOADs, bit 2 for STOREs + +An example set of counters +=========================== + +# instructions retired +cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 2 -m 3 + +# trace cache misses +cpuperf -E BPU_ESCR0 -C BPU_CCCR0 -e 3 -m 1 + +# L1 D cache misses (load misses retired) +cpuperf -E CRU_ESCR2 -C IQ_CCCR1 -e 9 -m 1 -P 0 -P 24 -P 25 -V 0 + +# L2 misses (load and store) +cpuperf -E BSU_ESCR0 -C BPU_CCCR1 -e 12 -m 1280 + +# I-TLB misses +cpuperf -E ITLB_ESCR1 -C BPU_CCCR2 -e 24 -m 2 + +# D-TLB misses (as PT walks) +cpuperf -E PMH_ESCR1 -C BPU_CCCR3 -e 1 -m 1 + +# Other 'bonus' counters would be: +# number of loads executed - need both command lines +cpuperf -E RAT_ESCR0 -C NONE -e 2 -m 2 +cpuperf -E CRU_ESCR3 -C IQ_CCCR3 -e 8 -m 3 + +# number of mispredicted branches +cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 3 -m 1 + +# x87 FP uOps +cpuperf -E FIRM_ESCR0 -C FLAME_CCCR0 -e 4 -m 32768 + +The above has counter assignments + +0 Trace cache misses +1 L2 Misses +2 I-TLB misses +3 D-TLB misses +4 +5 +6 +7 +8 x87 FP uOps +9 +10 +11 +12 Instructions retired +13 L1 D cache misses +14 Mispredicted branches +15 Loads executed +16 +17 + +Counting instructions retired on each logical CPU +================================================= + +cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 2 -m 3 -t 12 +cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 2 -m 3 -t 3 + +Cannot count mispred branches as well due to CRU_ESCR1 use. diff --git a/tools/misc/cpuperf/cpuperf.c b/tools/misc/cpuperf/cpuperf.c new file mode 100644 index 0000000000..4e4dbb0665 --- /dev/null +++ b/tools/misc/cpuperf/cpuperf.c @@ -0,0 +1,301 @@ +/* + * User mode program to program performance counters. + * + * JRB/IAP October 2003. + * + * $Id: cpuperf.c,v 1.2 2003/10/14 11:00:59 jrb44 Exp $ + * + * $Log: cpuperf.c,v $ + * Revision 1.2 2003/10/14 11:00:59 jrb44 + * Added dcefault CPU. Added NONE CCCR. + * + * Revision 1.1 2003/10/13 16:49:44 jrb44 + * Initial revision + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "p4perf.h" + +static inline void cpus_wrmsr(int cpu_mask, + int msr, + unsigned int low, + unsigned int high ) +{ + fprintf(stderr, "No backend to write MSR 0x%x <= 0x%08x%08x on %08x\n", + msr, high, low, cpu_mask); +} + +static inline unsigned long long cpus_rdmsr( int cpu_mask, int msr ) +{ + fprintf(stderr, "No backend to read MSR 0x%x on %08x\n", msr, cpu_mask); + return 0; +} + +#ifdef PERFCNTR +#include "cpuperf_perfcntr.h" +#define cpus_wrmsr perfcntr_wrmsr +#define cpus_rdmsr perfcntr_rdmsr +#endif + +#ifdef XENO +#include "cpuperf_xeno.h" +#define cpus_wrmsr dom0_wrmsr +#define cpus_rdmsr dom0_rdmsr +#endif + +struct macros { + char *name; + unsigned long msr_addr; + int number; +}; + +#define NO_CCCR 0xfffffffe + +struct macros msr[] = { + {"BPU_COUNTER0", 0x300, 0}, + {"BPU_COUNTER1", 0x301, 1}, + {"BPU_COUNTER2", 0x302, 2}, + {"BPU_COUNTER3", 0x303, 3}, + {"MS_COUNTER0", 0x304, 4}, + {"MS_COUNTER1", 0x305, 5}, + {"MS_COUNTER2", 0x306, 6}, + {"MS_COUNTER3", 0x307, 7}, + {"FLAME_COUNTER0", 0x308, 8}, + {"FLAME_COUNTER1", 0x309, 9}, + {"FLAME_COUNTER2", 0x30a, 10}, + {"FLAME_COUNTER3", 0x30b, 11}, + {"IQ_COUNTER0", 0x30c, 12}, + {"IQ_COUNTER1", 0x30d, 13}, + {"IQ_COUNTER2", 0x30e, 14}, + {"IQ_COUNTER3", 0x30f, 15}, + {"IQ_COUNTER4", 0x310, 16}, + {"IQ_COUNTER5", 0x311, 17}, + {"BPU_CCCR0", 0x360, 0}, + {"BPU_CCCR1", 0x361, 1}, + {"BPU_CCCR2", 0x362, 2}, + {"BPU_CCCR3", 0x363, 3}, + {"MS_CCCR0", 0x364, 4}, + {"MS_CCCR1", 0x365, 5}, + {"MS_CCCR2", 0x366, 6}, + {"MS_CCCR3", 0x367, 7}, + {"FLAME_CCCR0", 0x368, 8}, + {"FLAME_CCCR1", 0x369, 9}, + {"FLAME_CCCR2", 0x36a, 10}, + {"FLAME_CCCR3", 0x36b, 11}, + {"IQ_CCCR0", 0x36c, 12}, + {"IQ_CCCR1", 0x36d, 13}, + {"IQ_CCCR2", 0x36e, 14}, + {"IQ_CCCR3", 0x36f, 15}, + {"IQ_CCCR4", 0x370, 16}, + {"IQ_CCCR5", 0x371, 17}, + {"BSU_ESCR0", 0x3a0, 7}, + {"BSU_ESCR1", 0x3a1, 7}, + {"FSB_ESCR0", 0x3a2, 6}, + {"FSB_ESCR1", 0x3a3, 6}, + {"MOB_ESCR0", 0x3aa, 2}, + {"MOB_ESCR1", 0x3ab, 2}, + {"PMH_ESCR0", 0x3ac, 4}, + {"PMH_ESCR1", 0x3ad, 4}, + {"BPU_ESCR0", 0x3b2, 0}, + {"BPU_ESCR1", 0x3b3, 0}, + {"IS_ESCR0", 0x3b4, 1}, + {"IS_ESCR1", 0x3b5, 1}, + {"ITLB_ESCR0", 0x3b6, 3}, + {"ITLB_ESCR1", 0x3b7, 3}, + {"IX_ESCR0", 0x3c8, 5}, + {"IX_ESCR1", 0x3c9, 5}, + {"MS_ESCR0", 0x3c0, 0}, + {"MS_ESCR1", 0x3c1, 0}, + {"TBPU_ESCR0", 0x3c2, 2}, + {"TBPU_ESCR1", 0x3c3, 2}, + {"TC_ESCR0", 0x3c4, 1}, + {"TC_ESCR1", 0x3c5, 1}, + {"FIRM_ESCR0", 0x3a4, 1}, + {"FIRM_ESCR1", 0x3a5, 1}, + {"FLAME_ESCR0", 0x3a6, 0}, + {"FLAME_ESCR1", 0x3a7, 0}, + {"DAC_ESCR0", 0x3a8, 5}, + {"DAC_ESCR1", 0x3a9, 5}, + {"SAAT_ESCR0", 0x3ae, 2}, + {"SAAT_ESCR1", 0x3af, 2}, + {"U2L_ESCR0", 0x3b0, 3}, + {"U2L_ESCR1", 0x3b1, 3}, + {"CRU_ESCR0", 0x3b8, 4}, + {"CRU_ESCR1", 0x3b9, 4}, + {"CRU_ESCR2", 0x3cc, 5}, + {"CRU_ESCR3", 0x3cd, 5}, + {"CRU_ESCR4", 0x3e0, 6}, + {"CRU_ESCR5", 0x3e1, 6}, + {"IQ_ESCR0", 0x3ba, 0}, + {"IQ_ESCR1", 0x3bb, 0}, + {"RAT_ESCR0", 0x3bc, 2}, + {"RAT_ESCR1", 0x3bd, 2}, + {"SSU_ESCR0", 0x3be, 3}, + {"SSU_ESCR1", 0x3bf, 3}, + {"ALF_ESCR0", 0x3ca, 1}, + {"ALF_ESCR1", 0x3cb, 1}, + {"PEBS_ENABLE", 0x3f1, 0}, + {"PEBS_MATRIX_VERT", 0x3f2, 0}, + {"NONE", NO_CCCR, 0}, + {NULL, 0, 0} +}; + +struct macros *lookup_macro(char *str) +{ + struct macros *m; + + m = msr; + while (m->name) { + if (strcmp(m->name, str) == 0) + return m; + m++; + } + return NULL; +} + +int main(int argc, char **argv) +{ + int c, t = 0xc, es = 0, em = 0, tv = 0, te = 0; + unsigned int cpu_mask = 1; + struct macros *escr = NULL, *cccr = NULL; + unsigned long escr_val, cccr_val; + int debug = 0; + unsigned long pebs = 0, pebs_vert = 0; + int pebs_x = 0, pebs_vert_x = 0; + int read = 0; + int compare = 0; + int complement = 0; + int edge = 0; + +#ifdef XENO + xen_init(); +#endif + + + while ((c = getopt(argc, argv, "dc:t:e:m:T:E:C:P:V:rkng")) != -1) { + switch((char)c) { + case 'P': + pebs |= 1 << atoi(optarg); + pebs_x = 1; + break; + case 'V': + pebs_vert |= 1 << atoi(optarg); + pebs_vert_x = 1; + break; + case 'd': + debug = 1; + break; + case 'c': + { + int cpu = atoi(optarg); + cpu_mask = (cpu == -1)?(~0):(1<>=1; + } + exit(1); + } + + if (!escr) { + fprintf(stderr, "Need an ESCR.\n"); + exit(1); + } + if (!cccr) { + fprintf(stderr, "Need a counter number.\n"); + exit(1); + } + + escr_val = P4_ESCR_THREADS(t) | P4_ESCR_EVNTSEL(es) | + P4_ESCR_EVNTMASK(em) | P4_ESCR_TV(tv) | ((te)?P4_ESCR_TE:0); + cccr_val = P4_CCCR_ENABLE | P4_CCCR_ESCR(escr->number) | + ((compare)?P4_CCCR_COMPARE:0) | + ((complement)?P4_CCCR_COMPLEMENT:0) | + ((edge)?P4_CCCR_EDGE:0) | + P4_CCCR_ACTIVE_THREAD(3)/*reserved*/; + + if (debug) { + fprintf(stderr, "ESCR 0x%lx <= 0x%08lx\n", escr->msr_addr, escr_val); + if (cccr->msr_addr != NO_CCCR) + fprintf(stderr, "CCCR 0x%lx <= 0x%08lx (%u)\n", + cccr->msr_addr, cccr_val, cccr->number); + if (pebs_x) + fprintf(stderr, "PEBS 0x%x <= 0x%08lx\n", + MSR_P4_PEBS_ENABLE, pebs); + if (pebs_vert_x) + fprintf(stderr, "PMV 0x%x <= 0x%08lx\n", + MSR_P4_PEBS_MATRIX_VERT, pebs_vert); + } + + cpus_wrmsr( cpu_mask, escr->msr_addr, escr_val, 0 ); + if (cccr->msr_addr != NO_CCCR) + cpus_wrmsr( cpu_mask, cccr->msr_addr, cccr_val, 0 ); + + if (pebs_x) + cpus_wrmsr( cpu_mask, MSR_P4_PEBS_ENABLE, pebs, 0 ); + + if (pebs_vert_x) + cpus_wrmsr( cpu_mask, MSR_P4_PEBS_MATRIX_VERT, pebs_vert, 0 ); + + return 0; +} + +// End of $RCSfile: cpuperf.c,v $ + diff --git a/tools/misc/cpuperf/cpuperf_perfcntr.h b/tools/misc/cpuperf/cpuperf_perfcntr.h new file mode 100644 index 0000000000..a75a4fc0f7 --- /dev/null +++ b/tools/misc/cpuperf/cpuperf_perfcntr.h @@ -0,0 +1,41 @@ +/* + * Interface to JRB44's /proc/perfcntr interface. + * + * $Id: cpuperf_perfcntr.h,v 1.1 2003/10/13 16:49:44 jrb44 Exp $ + * + * $Log: cpuperf_perfcntr.h,v $ + * Revision 1.1 2003/10/13 16:49:44 jrb44 + * Initial revision + * + */ + +#define PROC_PERFCNTR "/proc/perfcntr" + +static inline void perfcntr_wrmsr(int cpu_mask, + int msr, + unsigned int low, + unsigned int high ) +{ + FILE *fd; + unsigned long long value = low | (((unsigned long long)high) << 32); + + fd = fopen(PROC_PERFCNTR, "w"); + if (fd == NULL) + { + perror("open " PROC_PERFCNTR); + exit(1); + } + + fprintf(fd, "%x %x %llx \n", cpu_mask, msr, value); + fprintf(stderr, "%x %x %llx \n", cpu_mask, msr, value); + fclose(fd); +} + +static inline unsigned long long perfcntr_rdmsr( int cpu_mask, int msr ) +{ + fprintf(stderr, "WARNING: rdmsr not yet implemented for perfcntr.\n"); + return 0; +} + +// End of $RCSfile: cpuperf_perfcntr.h,v $ + diff --git a/tools/misc/cpuperf/cpuperf_xeno.h b/tools/misc/cpuperf/cpuperf_xeno.h new file mode 100644 index 0000000000..4f7da770e1 --- /dev/null +++ b/tools/misc/cpuperf/cpuperf_xeno.h @@ -0,0 +1,38 @@ +/* + * Interface to Xen MSR hypercalls. + * + * $Id: cpuperf_xeno.h,v 1.1 2003/10/13 16:49:44 jrb44 Exp $ + * + * $Log: cpuperf_xeno.h,v $ + * Revision 1.1 2003/10/13 16:49:44 jrb44 + * Initial revision + * + */ + +#include + +static int xc_handle; + +void xen_init() +{ + if ( (xc_handle = xc_interface_open()) == -1 ) + { + fprintf(stderr, "Error opening xc interface: %d (%s)\n", + errno, strerror(errno)); + exit(-1); + } + +} + +void dom0_wrmsr( int cpu_mask, int msr, unsigned int low, unsigned int high ) +{ + xc_msr_write (xc_handle, cpu_mask, msr, low, high); +} + +unsigned long long dom0_rdmsr( int cpu_mask, int msr ) +{ + return xc_msr_read(xc_handle, cpu_mask, msr); +} + +// End of $RCSfile: cpuperf_xeno.h,v $ + diff --git a/tools/misc/cpuperf/module/Makefile b/tools/misc/cpuperf/module/Makefile new file mode 100644 index 0000000000..0a1c976ef4 --- /dev/null +++ b/tools/misc/cpuperf/module/Makefile @@ -0,0 +1,16 @@ +############################################################################# +# (C) 2005 - Rolf Neugebauer - Intel Research Cambridge +############################################################################# +# +# File: Makefile +# Author: Rolf Neugebauer (rolf.neugebauer@intel.com) +# Date: Mar 2005 +# +# Environment: +# + +# invoke: +# make -C /lib/modules/`uname -r`/build SUBDIRS=`pwd` modules_install + +obj-m := perfcntr.o + diff --git a/tools/misc/cpuperf/module/perfcntr.c b/tools/misc/cpuperf/module/perfcntr.c new file mode 100644 index 0000000000..6a8f48c938 --- /dev/null +++ b/tools/misc/cpuperf/module/perfcntr.c @@ -0,0 +1,730 @@ +/* + * Linux loadable kernel module to use P4 performance counters. + * + * James Bulpin, Feb 2003. + * + * $Id$ + * + * $Log$ + */ + +#define DRV_NAME "perfcntr" +#define DRV_VERSION "0.2" +#define DRV_RELDATE "02 Jun 2004" + + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define NOHT + +#include "../p4perf.h" + +#ifdef NOHT +# define CPUMASK 0x00000003 +#else +# define CPUMASK 0x00000005 +#endif + +/***************************************************************************** + * Module admin * + *****************************************************************************/ + +MODULE_AUTHOR("James Bulpin "); +MODULE_DESCRIPTION("P4 Performance Counters access " + DRV_VERSION " " DRV_RELDATE); +MODULE_LICENSE("GPL"); + +static char version[] __devinitdata = +DRV_NAME ": James Bulpin.\n"; + +static unsigned char foobar[4]; + +/* rpcc: get full 64-bit Pentium TSC value + */ +static __inline__ unsigned long long int rpcc(void) +{ + unsigned int __h, __l; + __asm__ __volatile__ ("rdtsc" :"=a" (__l), "=d" (__h)); + return (((unsigned long long)__h) << 32) + __l; +} + +/***************************************************************************** + * Display the counters * + *****************************************************************************/ + +//#define processor cpu // post 2.4.16 + +typedef union { + struct { + unsigned long lo; + unsigned long hi; + }; + unsigned long long cnt; +} cpu_perfcntr_t; + +typedef struct counters_t_struct { + int processor; + unsigned long long tsc; + cpu_perfcntr_t counters[18]; +} counters_t; + +typedef struct perfcntr_t_struct { + unsigned long cpu_mask; + counters_t cpus[4]; // Actually for each cpu in system +} perfcntr_t; + +#ifdef HUMAN_READABLE +# define SHOW_COUNTER(c) rdmsr (c, l, h);\ + seq_printf(m, "0x%03x: 0x%08x%08x\n", c, h, l) +#else +# define SHOW_COUNTER(c) rdmsr (c, l, h);\ + seq_printf(m, " %llu", \ + (unsigned long long)h << 32 | (unsigned long long)l) +#endif + +#if 0 +static unsigned long last_l = 0, last_h = 0, last_msr = 0; +static int last_cpu = 0; +#endif + +#define READ_COUNTER(_i, _msr) rdmsr((_msr), l, h); c->counters[_i].lo = l; \ + c->counters[_i].hi = h; + +static perfcntr_t perfcntrs; + +static void show_perfcntr_for(void *v) +{ + unsigned int l, h; + + perfcntr_t *p = &perfcntrs; + counters_t *c; + + if (!((1 << smp_processor_id()) & p->cpu_mask)) + return; + + c = &p->cpus[smp_processor_id()]; + + c->processor = smp_processor_id(); + c->tsc = rpcc(); + + READ_COUNTER(0, MSR_P4_BPU_COUNTER0); + READ_COUNTER(1, MSR_P4_BPU_COUNTER1); + READ_COUNTER(2, MSR_P4_BPU_COUNTER2); + READ_COUNTER(3, MSR_P4_BPU_COUNTER3); + + READ_COUNTER(4, MSR_P4_MS_COUNTER0); + READ_COUNTER(5, MSR_P4_MS_COUNTER1); + READ_COUNTER(6, MSR_P4_MS_COUNTER2); + READ_COUNTER(7, MSR_P4_MS_COUNTER3); + + READ_COUNTER(8, MSR_P4_FLAME_COUNTER0); + READ_COUNTER(9, MSR_P4_FLAME_COUNTER1); + READ_COUNTER(10, MSR_P4_FLAME_COUNTER2); + READ_COUNTER(11, MSR_P4_FLAME_COUNTER3); + + READ_COUNTER(12, MSR_P4_IQ_COUNTER0); + READ_COUNTER(13, MSR_P4_IQ_COUNTER1); + READ_COUNTER(14, MSR_P4_IQ_COUNTER2); + READ_COUNTER(15, MSR_P4_IQ_COUNTER3); + READ_COUNTER(16, MSR_P4_IQ_COUNTER4); + READ_COUNTER(17, MSR_P4_IQ_COUNTER5); + + return; +} + +static int show_perfcntr(struct seq_file *m, void *v) +{ + int i, j; + + // Get each physical cpu to read counters + perfcntrs.cpu_mask = CPUMASK; + + smp_call_function(show_perfcntr_for, NULL, 1, 1); + show_perfcntr_for(NULL); + + for (i = 0; i < 32; i++) { + if (((1 << i) & (perfcntrs.cpu_mask = CPUMASK))) { + counters_t *c = &perfcntrs.cpus[i]; + seq_printf(m, "%u %llu", c->processor, c->tsc); + for (j = 0; j < 18; j++) { + seq_printf(m, " %llu", c->counters[j].cnt); + } + seq_printf(m, "\n"); + } + } + +#if 0 + unsigned long long t; + unsigned int l, h; + + t = rpcc(); + + + +#ifdef HUMAN_READABLE + seq_printf(m, + "show_perfcntr\nprocessor: %u\ntime: %llu\n" + "last write: 0x%08lx%08lx -> 0x%lx (CPU%u)\n", + smp_processor_id(), + t, + last_h, + last_l, + last_msr, + last_cpu); +#else + seq_printf(m, "%u %llu", smp_processor_id(), t); +#endif + + SHOW_COUNTER(MSR_P4_BPU_COUNTER0); + SHOW_COUNTER(MSR_P4_BPU_COUNTER1); + SHOW_COUNTER(MSR_P4_BPU_COUNTER2); + SHOW_COUNTER(MSR_P4_BPU_COUNTER3); + + SHOW_COUNTER(MSR_P4_MS_COUNTER0); + SHOW_COUNTER(MSR_P4_MS_COUNTER1); + SHOW_COUNTER(MSR_P4_MS_COUNTER2); + SHOW_COUNTER(MSR_P4_MS_COUNTER3); + + SHOW_COUNTER(MSR_P4_FLAME_COUNTER0); + SHOW_COUNTER(MSR_P4_FLAME_COUNTER1); + SHOW_COUNTER(MSR_P4_FLAME_COUNTER2); + SHOW_COUNTER(MSR_P4_FLAME_COUNTER3); + + SHOW_COUNTER(MSR_P4_IQ_COUNTER0); + SHOW_COUNTER(MSR_P4_IQ_COUNTER1); + SHOW_COUNTER(MSR_P4_IQ_COUNTER2); + SHOW_COUNTER(MSR_P4_IQ_COUNTER3); + SHOW_COUNTER(MSR_P4_IQ_COUNTER4); + SHOW_COUNTER(MSR_P4_IQ_COUNTER5); + +#ifndef HUMAN_READBLE + seq_printf(m, "\n"); +#endif + +#endif + + return 0; +} + +/***************************************************************************** + * Show counter configuration * + *****************************************************************************/ + +typedef union { + struct { + unsigned long lo; + unsigned long hi; + }; + unsigned long long cnt; +} cpu_perfcfg_t; + +typedef struct configs_t_struct { + int processor; + unsigned long long tsc; + cpu_perfcfg_t cccr[18]; + cpu_perfcfg_t escr[0x42]; +} configs_t; + +typedef struct perfcfg_t_struct { + unsigned long cpu_mask; + configs_t cpus[4]; // Actually for each cpu in system +} perfcfg_t; + +static perfcfg_t perfcfgs; + +#define READ_CCCR(_i, _msr) rdmsr((_msr), l, h); c->cccr[_i].lo = l; \ + c->cccr[_i].hi = h; +#define READ_ESCR(_i, _msr) rdmsr((_msr), l, h); c->escr[_i].lo = l; \ + c->escr[_i].hi = h; + +static void show_perfcfg_for(void *v) +{ + unsigned int l, h; + + perfcfg_t *p = &perfcfgs; + configs_t *c; + + if (!((1 << smp_processor_id()) & p->cpu_mask)) + return; + + c = &p->cpus[smp_processor_id()]; + + c->processor = smp_processor_id(); + c->tsc = rpcc(); + + READ_CCCR(0, MSR_P4_BPU_CCCR0); + READ_CCCR(1, MSR_P4_BPU_CCCR1); + READ_CCCR(2, MSR_P4_BPU_CCCR2); + READ_CCCR(3, MSR_P4_BPU_CCCR3); + + READ_CCCR(4, MSR_P4_MS_CCCR0); + READ_CCCR(5, MSR_P4_MS_CCCR1); + READ_CCCR(6, MSR_P4_MS_CCCR2); + READ_CCCR(7, MSR_P4_MS_CCCR3); + + READ_CCCR(8, MSR_P4_FLAME_CCCR0); + READ_CCCR(9, MSR_P4_FLAME_CCCR1); + READ_CCCR(10, MSR_P4_FLAME_CCCR2); + READ_CCCR(11, MSR_P4_FLAME_CCCR3); + + READ_CCCR(12, MSR_P4_IQ_CCCR0); + READ_CCCR(13, MSR_P4_IQ_CCCR1); + READ_CCCR(14, MSR_P4_IQ_CCCR2); + READ_CCCR(15, MSR_P4_IQ_CCCR3); + READ_CCCR(16, MSR_P4_IQ_CCCR4); + READ_CCCR(17, MSR_P4_IQ_CCCR5); + + READ_ESCR(0x00, MSR_P4_BSU_ESCR0); + READ_ESCR(0x02, MSR_P4_FSB_ESCR0); + READ_ESCR(0x0a, MSR_P4_MOB_ESCR0); + READ_ESCR(0x0c, MSR_P4_PMH_ESCR0); + READ_ESCR(0x12, MSR_P4_BPU_ESCR0); + READ_ESCR(0x14, MSR_P4_IS_ESCR0); + READ_ESCR(0x16, MSR_P4_ITLB_ESCR0); + READ_ESCR(0x28, MSR_P4_IX_ESCR0); + READ_ESCR(0x01, MSR_P4_BSU_ESCR1); + READ_ESCR(0x03, MSR_P4_FSB_ESCR1); + READ_ESCR(0x0b, MSR_P4_MOB_ESCR1); + READ_ESCR(0x0d, MSR_P4_PMH_ESCR1); + READ_ESCR(0x13, MSR_P4_BPU_ESCR1); + READ_ESCR(0x15, MSR_P4_IS_ESCR1); + READ_ESCR(0x17, MSR_P4_ITLB_ESCR1); + READ_ESCR(0x29, MSR_P4_IX_ESCR1); + READ_ESCR(0x20, MSR_P4_MS_ESCR0); + READ_ESCR(0x22, MSR_P4_TBPU_ESCR0); + READ_ESCR(0x24, MSR_P4_TC_ESCR0); + READ_ESCR(0x21, MSR_P4_MS_ESCR1); + READ_ESCR(0x23, MSR_P4_TBPU_ESCR1); + READ_ESCR(0x25, MSR_P4_TC_ESCR1); + READ_ESCR(0x04, MSR_P4_FIRM_ESCR0); + READ_ESCR(0x06, MSR_P4_FLAME_ESCR0); + READ_ESCR(0x08, MSR_P4_DAC_ESCR0); + READ_ESCR(0x0e, MSR_P4_SAAT_ESCR0); + READ_ESCR(0x10, MSR_P4_U2L_ESCR0); + READ_ESCR(0x05, MSR_P4_FIRM_ESCR1); + READ_ESCR(0x07, MSR_P4_FLAME_ESCR1); + READ_ESCR(0x09, MSR_P4_DAC_ESCR1); + READ_ESCR(0x0f, MSR_P4_SAAT_ESCR1); + READ_ESCR(0x11, MSR_P4_U2L_ESCR1); + READ_ESCR(0x18, MSR_P4_CRU_ESCR0); + READ_ESCR(0x2c, MSR_P4_CRU_ESCR2); + READ_ESCR(0x40, MSR_P4_CRU_ESCR4); + READ_ESCR(0x1a, MSR_P4_IQ_ESCR0); + READ_ESCR(0x1c, MSR_P4_RAT_ESCR0); + READ_ESCR(0x1e, MSR_P4_SSU_ESCR0); + READ_ESCR(0x2a, MSR_P4_ALF_ESCR0); + READ_ESCR(0x19, MSR_P4_CRU_ESCR1); + READ_ESCR(0x2d, MSR_P4_CRU_ESCR3); + READ_ESCR(0x41, MSR_P4_CRU_ESCR5); + READ_ESCR(0x1b, MSR_P4_IQ_ESCR1); + READ_ESCR(0x1d, MSR_P4_RAT_ESCR1); + READ_ESCR(0x2b, MSR_P4_ALF_ESCR1); + + return; +} + +static char *escr_names[] = { + "BSU_ESCR0", + "BSU_ESCR1", + "FSB_ESCR0", + "FSB_ESCR1", + "FIRM_ESCR0", + "FIRM_ESCR1", + "FLAME_ESCR0", + "FLAME_ESCR1", + "DAC_ESCR0", + "DAC_ESCR1", + "MOB_ESCR0", + "MOB_ESCR1", + "PMH_ESCR0", + "PMH_ESCR1", + "SAAT_ESCR0", + "SAAT_ESCR1", + "U2L_ESCR0", + "U2L_ESCR1", + "BPU_ESCR0", + "BPU_ESCR1", + "IS_ESCR0", + "IS_ESCR1", + "ITLB_ESCR0", + "ITLB_ESCR1", + "CRU_ESCR0", + "CRU_ESCR1", + "IQ_ESCR0", + "IQ_ESCR1", + "RAT_ESCR0", + "RAT_ESCR1", + "SSU_ESCR0", + "SSU_ESCR1", + "MS_ESCR0", + "MS_ESCR1", + "TBPU_ESCR0", + "TBPU_ESCR1", + "TC_ESCR0", + "TC_ESCR1", + "0x3c6", + "0x3c7", + "IX_ESCR0", + "IX_ESCR1", + "ALF_ESCR0", + "ALF_ESCR1", + "CRU_ESCR2", + "CRU_ESCR3", + "0x3ce", + "0x3cf", + "0x3d0", + "0x3d1", + "0x3d2", + "0x3d3", + "0x3d4", + "0x3d5", + "0x3d6", + "0x3d7", + "0x3d8", + "0x3d9", + "0x3da", + "0x3db", + "0x3dc", + "0x3dd", + "0x3de", + "0x3df", + "CRU_ESCR4", + "CRU_ESCR5" +}; + +static unsigned long escr_map_0[] = +{MSR_P4_BPU_ESCR0, MSR_P4_IS_ESCR0, + MSR_P4_MOB_ESCR0, MSR_P4_ITLB_ESCR0, + MSR_P4_PMH_ESCR0, MSR_P4_IX_ESCR0, + MSR_P4_FSB_ESCR0, MSR_P4_BSU_ESCR0}; //BPU even +static unsigned long escr_map_1[] = + {MSR_P4_BPU_ESCR1, MSR_P4_IS_ESCR1, + MSR_P4_MOB_ESCR1, MSR_P4_ITLB_ESCR1, + MSR_P4_PMH_ESCR1, MSR_P4_IX_ESCR1, + MSR_P4_FSB_ESCR1, MSR_P4_BSU_ESCR1}; //BPU odd +static unsigned long escr_map_2[] = + {MSR_P4_MS_ESCR0, MSR_P4_TC_ESCR0, MSR_P4_TBPU_ESCR0, + 0, 0, 0, 0, 0}; //MS even +static unsigned long escr_map_3[] = + {MSR_P4_MS_ESCR1, MSR_P4_TC_ESCR1, MSR_P4_TBPU_ESCR1, + 0, 0, 0, 0, 0}; //MS odd +static unsigned long escr_map_4[] = + {MSR_P4_FLAME_ESCR0, MSR_P4_FIRM_ESCR0, MSR_P4_SAAT_ESCR0, + MSR_P4_U2L_ESCR0, 0, MSR_P4_DAC_ESCR0, 0, 0}; //FLAME even +static unsigned long escr_map_5[] = + {MSR_P4_FLAME_ESCR1, MSR_P4_FIRM_ESCR1, MSR_P4_SAAT_ESCR1, + MSR_P4_U2L_ESCR1, 0, MSR_P4_DAC_ESCR1, 0, 0}; //FLAME odd +static unsigned long escr_map_6[] = + {MSR_P4_IQ_ESCR0, MSR_P4_ALF_ESCR0, + MSR_P4_RAT_ESCR0, MSR_P4_SSU_ESCR0, + MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR4, 0}; //IQ even +static unsigned long escr_map_7[] = + {MSR_P4_IQ_ESCR1, MSR_P4_ALF_ESCR1, + MSR_P4_RAT_ESCR1, 0, + MSR_P4_CRU_ESCR1, MSR_P4_CRU_ESCR3, MSR_P4_CRU_ESCR5, 0}; //IQ odd + +static unsigned long *escr_map[] = { + escr_map_0, + escr_map_1, + escr_map_2, + escr_map_3, + escr_map_4, + escr_map_5, + escr_map_6, + escr_map_7, +}; + +unsigned long get_escr_msr(int c, int e) +{ + int index = -1; + + // Get the ESCR MSR address from the counter number and the ESCR number. + switch (c) { + case P4_BPU_COUNTER0_NUMBER: + case P4_BPU_COUNTER1_NUMBER: + index = 0; + break; + case P4_BPU_COUNTER2_NUMBER: + case P4_BPU_COUNTER3_NUMBER: + index = 1; + break; + case P4_MS_COUNTER0_NUMBER: + case P4_MS_COUNTER1_NUMBER: + index = 2; // probably ! + break; + case P4_MS_COUNTER2_NUMBER: + case P4_MS_COUNTER3_NUMBER: + index = 3; // probably ! + break; + case P4_FLAME_COUNTER0_NUMBER: + case P4_FLAME_COUNTER1_NUMBER: + index = 4; // probably ! + break; + case P4_FLAME_COUNTER2_NUMBER: + case P4_FLAME_COUNTER3_NUMBER: + index = 5; // probably ! + break; + case P4_IQ_COUNTER0_NUMBER: + case P4_IQ_COUNTER1_NUMBER: + case P4_IQ_COUNTER4_NUMBER: + index = 6; + break; + case P4_IQ_COUNTER2_NUMBER: + case P4_IQ_COUNTER3_NUMBER: + case P4_IQ_COUNTER5_NUMBER: + index = 7; + break; + } + + if (index != -1) { + return escr_map[index][e]; + } + + return 0; +} + +static char null_string[] = ""; +static char *get_escr(int c, int e) +{ + unsigned long msr = get_escr_msr(c, e); + + if ((msr >= 0x3a0) && (msr <= 0x3e1)) + return escr_names[(int)(msr - 0x3a0)]; + return null_string; +} + +static int show_perfcfg(struct seq_file *m, void *v) +{ + int i, j; + + // Get each physical cpu to read configs + perfcfgs.cpu_mask = CPUMASK; + + smp_call_function(show_perfcfg_for, NULL, 1, 1); + show_perfcfg_for(NULL); + + for (i = 0; i < 32; i++) { + if (((1 << i) & (perfcfgs.cpu_mask = CPUMASK))) { + configs_t *c = &perfcfgs.cpus[i]; + seq_printf(m, "----------------------------------------\n"); + seq_printf(m, "%u %llu\n", c->processor, c->tsc); + for (j = 0; j < 18; j++) { + seq_printf(m, "%08lx", c->cccr[j].lo); + + if (!(c->cccr[j].lo & P4_CCCR_ENABLE)) + seq_printf(m, " DISABLED"); + else { + unsigned long escr_msr = + get_escr_msr(i, (int)((c->cccr[j].lo >> 13)&7)); + seq_printf(m, " ESCR=%s", + get_escr(i, (int)((c->cccr[j].lo >> 13)&7))); + if ((escr_msr >= 0x3a0) && (escr_msr <= 0x3e1)) { + unsigned long e = c->escr[(int)(escr_msr - 0x3a0)].lo; + seq_printf(m, "(%08lx es=%lx mask=%lx", e, + (e >> 25) & 0x7f, + (e >> 9) & 0xffff); + if ((e & P4_ESCR_T0_USR)) + seq_printf(m, " T(0)USR"); + if ((e & P4_ESCR_T0_OS)) + seq_printf(m, " T(0)OS"); + if ((e & P4_ESCR_T1_USR)) + seq_printf(m, " T1USR"); + if ((e & P4_ESCR_T1_OS)) + seq_printf(m, " T1OS"); + seq_printf(m, ")"); + } + seq_printf(m, " AT=%u", (int)((c->cccr[j].lo >> 16)&3)); + + if ((c->cccr[j].lo & P4_CCCR_OVF)) + seq_printf(m, " OVF"); + if ((c->cccr[j].lo & P4_CCCR_CASCADE)) + seq_printf(m, " CASC"); + if ((c->cccr[j].lo & P4_CCCR_FORCE_OVF)) + seq_printf(m, " F-OVF"); + if ((c->cccr[j].lo & P4_CCCR_EDGE)) + seq_printf(m, " EDGE"); + if ((c->cccr[j].lo & P4_CCCR_COMPLEMENT)) + seq_printf(m, " COMPL"); + if ((c->cccr[j].lo & P4_CCCR_COMPARE)) + seq_printf(m, " CMP"); + if ((c->cccr[j].lo & P4_CCCR_OVF_PMI_T0)) + seq_printf(m, " OVF_PMI(_T0)"); + if ((c->cccr[j].lo & P4_CCCR_OVF_PMI_T1)) + seq_printf(m, " OVF_PMI_T1"); + } + seq_printf(m, "\n"); + } + } + } + + return 0; +} + +/***************************************************************************** + * Handle writes * + *****************************************************************************/ + +static int set_msr_cpu_mask; +static unsigned long set_msr_addr; +static unsigned long set_msr_lo; +static unsigned long set_msr_hi; + +static void perfcntr_write_for(void *unused) +{ +#ifdef NOHT + if (((1 << smp_processor_id()) & set_msr_cpu_mask)) { +#endif + //printk("perfcntr: wrmsr(%08lx, %08lx, %08lx)\n", + // set_msr_addr, set_msr_lo, set_msr_hi); + wrmsr(set_msr_addr, set_msr_lo, set_msr_hi); +#ifdef NOHT + } +#endif +} + +ssize_t perfcntr_write(struct file *f, + const char *data, + size_t size, + loff_t *pos) +{ + char *endp; + ssize_t ret = 0; + //unsigned long l, h, msr; + unsigned long long v; + + set_msr_cpu_mask = (int)simple_strtoul(data, &endp, 16); + endp++; // skip past space + if ((endp - data) >= size) { + ret = -EINVAL; + goto out; + } + + set_msr_addr = simple_strtoul(endp, &endp, 16); + endp++; // skip past space + if ((endp - data) >= size) { + ret = -EINVAL; + goto out; + } + + v = simple_strtoul(endp, &endp, 16); + set_msr_lo = (unsigned long)(v & 0xffffffffULL); + set_msr_hi = (unsigned long)(v >> 32); + + smp_call_function(perfcntr_write_for, NULL, 1, 1); + perfcntr_write_for(NULL); + +#if 0 + wrmsr(msr, l, h); + last_l = l; + last_h = h; + last_msr = msr; + last_cpu = smp_processor_id(); +#endif + ret = size; + + out: + return ret; +} + +/***************************************************************************** + * /proc stuff * + *****************************************************************************/ + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + //return *pos < NR_CPUS ? cpu_data + *pos : NULL; + return *pos == 0 ? foobar : NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +struct seq_operations perfcntr_op = { + start: c_start, + next: c_next, + stop: c_stop, + show: show_perfcntr, +}; + +struct seq_operations perfcfg_op = { + start: c_start, + next: c_next, + stop: c_stop, + show: show_perfcfg, +}; + +static int perfcntr_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &perfcntr_op); +} + +static int perfcfg_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &perfcfg_op); +} + +static struct file_operations proc_perfcntr_operations = { + open: perfcntr_open, + read: seq_read, + write: perfcntr_write, + llseek: seq_lseek, + release: seq_release, +}; + +static struct file_operations proc_perfcfg_operations = { + open: perfcfg_open, + read: seq_read, + write: perfcntr_write, + llseek: seq_lseek, + release: seq_release, +}; + +static void create_seq_entry(char *name, mode_t mode, struct file_operations *f) +{ + struct proc_dir_entry *entry; + entry = create_proc_entry(name, mode, NULL); + if (entry) + entry->proc_fops = f; +} + +/***************************************************************************** + * Module init and cleanup * + *****************************************************************************/ + +static int __init perfcntr_init(void) +{ + printk(version); + + create_seq_entry("perfcntr", 0777, &proc_perfcntr_operations); + create_seq_entry("perfcntr_config", 0777, &proc_perfcfg_operations); + + return 0; +} + +static void __exit perfcntr_exit(void) +{ + remove_proc_entry("perfcntr", NULL); + remove_proc_entry("perfcntr_config", NULL); +} + +module_init(perfcntr_init); +module_exit(perfcntr_exit); + +/* End of $RCSfile$ */ diff --git a/tools/misc/cpuperf/p4perf.h b/tools/misc/cpuperf/p4perf.h new file mode 100644 index 0000000000..04eef39b3c --- /dev/null +++ b/tools/misc/cpuperf/p4perf.h @@ -0,0 +1,382 @@ +/* + * P4 Performance counter stuff. + * + * P4 Xeon with Hyperthreading has counters per physical package which can + * count events from either logical CPU. However, in many cases more than + * ECSR and CCCR/counter can be used to count the same event. For instr or + * uops retired, use either ESCR0/IQ_CCCR0 ESCR1/IQ_CCCR2. + * + * $Id: p4perf.h,v 1.2 2003/10/13 16:51:41 jrb44 Exp $ + * + * $Log: p4perf.h,v $ + * Revision 1.2 2003/10/13 16:51:41 jrb44 + * *** empty log message *** + * + */ + +#ifndef P4PERF_H +#define P4PERF_H + +#ifdef __KERNEL__ +#include +#endif + +/***************************************************************************** + * Performance counter configuration. * + *****************************************************************************/ + +#ifndef P6_EVNTSEL_OS +# define P6_EVNTSEL_OS (1 << 17) +# define P6_EVNTSEL_USR (1 << 16) +# define P6_EVNTSEL_E (1 << 18) +# define P6_EVNTSEL_EN (1 << 22) +#endif +#define P6_PERF_INST_RETIRED 0xc0 +#define P6_PERF_UOPS_RETIRED 0xc2 + +#define P4_ESCR_USR (1 << 2) +#define P4_ESCR_OS (1 << 3) +#define P4_ESCR_T0_USR (1 << 2) /* First logical CPU */ +#define P4_ESCR_T0_OS (1 << 3) +#define P4_ESCR_T1_USR (1 << 0) /* Second logical CPU */ +#define P4_ESCR_T1_OS (1 << 1) +#define P4_ESCR_TE (1 << 4) +#define P4_ESCR_THREADS(t) (t) +#define P4_ESCR_TV(tag) (tag << 5) +#define P4_ESCR_EVNTSEL(e) (e << 25) +#define P4_ESCR_EVNTMASK(e) (e << 9) + +#define P4_ESCR_EVNTSEL_FRONT_END 0x08 +#define P4_ESCR_EVNTSEL_EXECUTION 0x0c +#define P4_ESCR_EVNTSEL_REPLAY 0x09 +#define P4_ESCR_EVNTSEL_INSTR_RETIRED 0x02 +#define P4_ESCR_EVNTSEL_UOPS_RETIRED 0x01 +#define P4_ESCR_EVNTSEL_UOP_TYPE 0x02 +#define P4_ESCR_EVNTSEL_RET_MBR_TYPE 0x05 +//#define P4_ESCR_EVNTSEL_RET_MBR_TYPE 0x04 + +#define P4_ESCR_EVNTMASK_FE_NBOGUS 0x01 +#define P4_ESCR_EVNTMASK_FE_BOGUS 0x02 + +#define P4_ESCR_EVNTMASK_EXEC_NBOGUS0 0x01 +#define P4_ESCR_EVNTMASK_EXEC_NBOGUS1 0x02 +#define P4_ESCR_EVNTMASK_EXEC_NBOGUS2 0x04 +#define P4_ESCR_EVNTMASK_EXEC_NBOGUS3 0x08 +#define P4_ESCR_EVNTMASK_EXEC_BOGUS0 0x10 +#define P4_ESCR_EVNTMASK_EXEC_BOGUS1 0x20 +#define P4_ESCR_EVNTMASK_EXEC_BOGUS2 0x40 +#define P4_ESCR_EVNTMASK_EXEC_BOGUS3 0x80 + +#define P4_ESCR_EVNTMASK_REPLAY_NBOGUS 0x01 +#define P4_ESCR_EVNTMASK_REPLAY_BOGUS 0x02 + +#define P4_ESCR_EVNTMASK_IRET_NB_NTAG 0x01 +#define P4_ESCR_EVNTMASK_IRET_NB_TAG 0x02 +#define P4_ESCR_EVNTMASK_IRET_B_NTAG 0x04 +#define P4_ESCR_EVNTMASK_IRET_B_TAG 0x08 + +#define P4_ESCR_EVNTMASK_URET_NBOGUS 0x01 +#define P4_ESCR_EVNTMASK_URET_BOGUS 0x02 + +#define P4_ESCR_EVNTMASK_UOP_LOADS 0x02 +#define P4_ESCR_EVNTMASK_UOP_STORES 0x04 + +#define P4_ESCR_EVNTMASK_RMBRT_COND 0x02 +#define P4_ESCR_EVNTMASK_RMBRT_CALL 0x04 +#define P4_ESCR_EVNTMASK_RMBRT_RETURN 0x08 +#define P4_ESCR_EVNTMASK_RMBRT_INDIR 0x10 + +#define P4_ESCR_EVNTMASK_RBRT_COND 0x02 +#define P4_ESCR_EVNTMASK_RBRT_CALL 0x04 +#define P4_ESCR_EVNTMASK_RBRT_RETURN 0x08 +#define P4_ESCR_EVNTMASK_RBRT_INDIR 0x10 + +//#define P4_ESCR_EVNTMASK_INSTR_RETIRED 0x01 /* Non bogus, not tagged */ +//#define P4_ESCR_EVNTMASK_UOPS_RETIRED 0x01 /* Non bogus */ + +#define P4_CCCR_OVF (1 << 31) +#define P4_CCCR_CASCADE (1 << 30) +#define P4_CCCR_FORCE_OVF (1 << 25) +#define P4_CCCR_EDGE (1 << 24) +#define P4_CCCR_COMPLEMENT (1 << 19) +#define P4_CCCR_COMPARE (1 << 18) +#define P4_CCCR_THRESHOLD(t) (t << 20) +#define P4_CCCR_ENABLE (1 << 12) +#define P4_CCCR_ESCR(escr) (escr << 13) +#define P4_CCCR_ACTIVE_THREAD(t) (t << 16) /* Set to 11 */ +#define P4_CCCR_OVF_PMI_T0 (1 << 26) +#define P4_CCCR_OVF_PMI_T1 (1 << 27) +#define P4_CCCR_RESERVED (3 << 16) +#define P4_CCCR_OVF_PMI (1 << 26) + +// BPU +#define MSR_P4_BPU_COUNTER0 0x300 +#define MSR_P4_BPU_COUNTER1 0x301 +#define MSR_P4_BPU_CCCR0 0x360 +#define MSR_P4_BPU_CCCR1 0x361 + +#define MSR_P4_BPU_COUNTER2 0x302 +#define MSR_P4_BPU_COUNTER3 0x303 +#define MSR_P4_BPU_CCCR2 0x362 +#define MSR_P4_BPU_CCCR3 0x363 + +#define MSR_P4_BSU_ESCR0 0x3a0 +#define MSR_P4_FSB_ESCR0 0x3a2 +#define MSR_P4_MOB_ESCR0 0x3aa +#define MSR_P4_PMH_ESCR0 0x3ac +#define MSR_P4_BPU_ESCR0 0x3b2 +#define MSR_P4_IS_ESCR0 0x3b4 +#define MSR_P4_ITLB_ESCR0 0x3b6 +#define MSR_P4_IX_ESCR0 0x3c8 + +#define P4_BSU_ESCR0_NUMBER 7 +#define P4_FSB_ESCR0_NUMBER 6 +#define P4_MOB_ESCR0_NUMBER 2 +#define P4_PMH_ESCR0_NUMBER 4 +#define P4_BPU_ESCR0_NUMBER 0 +#define P4_IS_ESCR0_NUMBER 1 +#define P4_ITLB_ESCR0_NUMBER 3 +#define P4_IX_ESCR0_NUMBER 5 + +#define MSR_P4_BSU_ESCR1 0x3a1 +#define MSR_P4_FSB_ESCR1 0x3a3 +#define MSR_P4_MOB_ESCR1 0x3ab +#define MSR_P4_PMH_ESCR1 0x3ad +#define MSR_P4_BPU_ESCR1 0x3b3 +#define MSR_P4_IS_ESCR1 0x3b5 +#define MSR_P4_ITLB_ESCR1 0x3b7 +#define MSR_P4_IX_ESCR1 0x3c9 + +#define P4_BSU_ESCR1_NUMBER 7 +#define P4_FSB_ESCR1_NUMBER 6 +#define P4_MOB_ESCR1_NUMBER 2 +#define P4_PMH_ESCR1_NUMBER 4 +#define P4_BPU_ESCR1_NUMBER 0 +#define P4_IS_ESCR1_NUMBER 1 +#define P4_ITLB_ESCR1_NUMBER 3 +#define P4_IX_ESCR1_NUMBER 5 + +// MS +#define MSR_P4_MS_COUNTER0 0x304 +#define MSR_P4_MS_COUNTER1 0x305 +#define MSR_P4_MS_CCCR0 0x364 +#define MSR_P4_MS_CCCR1 0x365 + +#define MSR_P4_MS_COUNTER2 0x306 +#define MSR_P4_MS_COUNTER3 0x307 +#define MSR_P4_MS_CCCR2 0x366 +#define MSR_P4_MS_CCCR3 0x367 + +#define MSR_P4_MS_ESCR0 0x3c0 +#define MSR_P4_TBPU_ESCR0 0x3c2 +#define MSR_P4_TC_ESCR0 0x3c4 + +#define P4_MS_ESCR0_NUMBER 0 +#define P4_TBPU_ESCR0_NUMBER 2 +#define P4_TC_ESCR0_NUMBER 1 + +#define MSR_P4_MS_ESCR1 0x3c1 +#define MSR_P4_TBPU_ESCR1 0x3c3 +#define MSR_P4_TC_ESCR1 0x3c5 + +#define P4_MS_ESCR1_NUMBER 0 +#define P4_TBPU_ESCR1_NUMBER 2 +#define P4_TC_ESCR1_NUMBER 1 + +// FLAME +#define MSR_P4_FLAME_COUNTER0 0x308 +#define MSR_P4_FLAME_COUNTER1 0x309 +#define MSR_P4_FLAME_CCCR0 0x368 +#define MSR_P4_FLAME_CCCR1 0x369 + +#define MSR_P4_FLAME_COUNTER2 0x30a +#define MSR_P4_FLAME_COUNTER3 0x30b +#define MSR_P4_FLAME_CCCR2 0x36a +#define MSR_P4_FLAME_CCCR3 0x36b + +#define MSR_P4_FIRM_ESCR0 0x3a4 +#define MSR_P4_FLAME_ESCR0 0x3a6 +#define MSR_P4_DAC_ESCR0 0x3a8 +#define MSR_P4_SAAT_ESCR0 0x3ae +#define MSR_P4_U2L_ESCR0 0x3b0 + +#define P4_FIRM_ESCR0_NUMBER 1 +#define P4_FLAME_ESCR0_NUMBER 0 +#define P4_DAC_ESCR0_NUMBER 5 +#define P4_SAAT_ESCR0_NUMBER 2 +#define P4_U2L_ESCR0_NUMBER 3 + +#define MSR_P4_FIRM_ESCR1 0x3a5 +#define MSR_P4_FLAME_ESCR1 0x3a7 +#define MSR_P4_DAC_ESCR1 0x3a9 +#define MSR_P4_SAAT_ESCR1 0x3af +#define MSR_P4_U2L_ESCR1 0x3b1 + +#define P4_FIRM_ESCR1_NUMBER 1 +#define P4_FLAME_ESCR1_NUMBER 0 +#define P4_DAC_ESCR1_NUMBER 5 +#define P4_SAAT_ESCR1_NUMBER 2 +#define P4_U2L_ESCR1_NUMBER 3 + +// IQ +#define MSR_P4_IQ_COUNTER0 0x30c +#define MSR_P4_IQ_COUNTER1 0x30d +#define MSR_P4_IQ_CCCR0 0x36c +#define MSR_P4_IQ_CCCR1 0x36d + +#define MSR_P4_IQ_COUNTER2 0x30e +#define MSR_P4_IQ_COUNTER3 0x30f +#define MSR_P4_IQ_CCCR2 0x36e +#define MSR_P4_IQ_CCCR3 0x36f + +#define MSR_P4_IQ_COUNTER4 0x310 +#define MSR_P4_IQ_COUNTER5 0x311 +#define MSR_P4_IQ_CCCR4 0x370 +#define MSR_P4_IQ_CCCR5 0x371 + +#define MSR_P4_CRU_ESCR0 0x3b8 +#define MSR_P4_CRU_ESCR2 0x3cc +#define MSR_P4_CRU_ESCR4 0x3e0 +#define MSR_P4_IQ_ESCR0 0x3ba +#define MSR_P4_RAT_ESCR0 0x3bc +#define MSR_P4_SSU_ESCR0 0x3be +#define MSR_P4_ALF_ESCR0 0x3ca + +#define P4_CRU_ESCR0_NUMBER 4 +#define P4_CRU_ESCR2_NUMBER 5 +#define P4_CRU_ESCR4_NUMBER 6 +#define P4_IQ_ESCR0_NUMBER 0 +#define P4_RAT_ESCR0_NUMBER 2 +#define P4_SSU_ESCR0_NUMBER 3 +#define P4_ALF_ESCR0_NUMBER 1 + +#define MSR_P4_CRU_ESCR1 0x3b9 +#define MSR_P4_CRU_ESCR3 0x3cd +#define MSR_P4_CRU_ESCR5 0x3e1 +#define MSR_P4_IQ_ESCR1 0x3bb +#define MSR_P4_RAT_ESCR1 0x3bd +#define MSR_P4_ALF_ESCR1 0x3cb + +#define P4_CRU_ESCR1_NUMBER 4 +#define P4_CRU_ESCR3_NUMBER 5 +#define P4_CRU_ESCR5_NUMBER 6 +#define P4_IQ_ESCR1_NUMBER 0 +#define P4_RAT_ESCR1_NUMBER 2 +#define P4_ALF_ESCR1_NUMBER 1 + +#define P4_BPU_COUNTER0_NUMBER 0 +#define P4_BPU_COUNTER1_NUMBER 1 +#define P4_BPU_COUNTER2_NUMBER 2 +#define P4_BPU_COUNTER3_NUMBER 3 + +#define P4_MS_COUNTER0_NUMBER 4 +#define P4_MS_COUNTER1_NUMBER 5 +#define P4_MS_COUNTER2_NUMBER 6 +#define P4_MS_COUNTER3_NUMBER 7 + +#define P4_FLAME_COUNTER0_NUMBER 8 +#define P4_FLAME_COUNTER1_NUMBER 9 +#define P4_FLAME_COUNTER2_NUMBER 10 +#define P4_FLAME_COUNTER3_NUMBER 11 + +#define P4_IQ_COUNTER0_NUMBER 12 +#define P4_IQ_COUNTER1_NUMBER 13 +#define P4_IQ_COUNTER2_NUMBER 14 +#define P4_IQ_COUNTER3_NUMBER 15 +#define P4_IQ_COUNTER4_NUMBER 16 +#define P4_IQ_COUNTER5_NUMBER 17 + +/* PEBS + */ +#define MSR_P4_PEBS_ENABLE 0x3F1 +#define MSR_P4_PEBS_MATRIX_VERT 0x3F2 + +#define P4_PEBS_ENABLE_MY_THR (1 << 25) +#define P4_PEBS_ENABLE_OTH_THR (1 << 26) +#define P4_PEBS_ENABLE (1 << 24) +#define P4_PEBS_BIT0 (1 << 0) +#define P4_PEBS_BIT1 (1 << 1) +#define P4_PEBS_BIT2 (1 << 2) + +#define P4_PEBS_MATRIX_VERT_BIT0 (1 << 0) +#define P4_PEBS_MATRIX_VERT_BIT1 (1 << 1) +#define P4_PEBS_MATRIX_VERT_BIT2 (1 << 2) + +/* Replay tagging. + */ +#define P4_REPLAY_TAGGING_PEBS_L1LMR P4_PEBS_BIT0 +#define P4_REPLAY_TAGGING_PEBS_L2LMR P4_PEBS_BIT1 +#define P4_REPLAY_TAGGING_PEBS_DTLMR P4_PEBS_BIT2 +#define P4_REPLAY_TAGGING_PEBS_DTSMR P4_PEBS_BIT2 +#define P4_REPLAY_TAGGING_PEBS_DTAMR P4_PEBS_BIT2 + +#define P4_REPLAY_TAGGING_VERT_L1LMR P4_PEBS_MATRIX_VERT_BIT0 +#define P4_REPLAY_TAGGING_VERT_L2LMR P4_PEBS_MATRIX_VERT_BIT0 +#define P4_REPLAY_TAGGING_VERT_DTLMR P4_PEBS_MATRIX_VERT_BIT0 +#define P4_REPLAY_TAGGING_VERT_DTSMR P4_PEBS_MATRIX_VERT_BIT1 +#define P4_REPLAY_TAGGING_VERT_DTAMR P4_PEBS_MATRIX_VERT_BIT0 | P4_PEBS_MATRIX_VERT_BIT1 + + + + +/***************************************************************************** + * * + *****************************************************************************/ + +// x87_FP_uop +#define EVENT_SEL_x87_FP_uop 0x04 +#define EVENT_MASK_x87_FP_uop_ALL (1 << 15) + +// execution event (at retirement) +#define EVENT_SEL_execution_event 0x0C + +// scalar_SP_uop +#define EVENT_SEL_scalar_SP_uop 0x0a +#define EVENT_MASK_scalar_SP_uop_ALL (1 << 15) + +// scalar_DP_uop +#define EVENT_SEL_scalar_DP_uop 0x0e +#define EVENT_MASK_scalar_DP_uop_ALL (1 << 15) + +// Instruction retired +#define EVENT_SEL_instr_retired 0x02 +#define EVENT_MASK_instr_retired_ALL 0x0f + +// uOps retired +#define EVENT_SEL_uops_retired 0x01 +#define EVENT_MASK_uops_retired_ALL 0x03 + +// L1 misses retired +#define EVENT_SEL_replay_event 0x09 +#define EVENT_MASK_replay_event_ALL 0x03 + +// Trace cache +#define EVENT_SEL_BPU_fetch_request 0x03 +#define EVENT_MASK_BPU_fetch_request_TCMISS 0x01 + +// Bus activity +#define EVENT_SEL_FSB_data_activity 0x17 +#define EVENT_MASK_FSB_data_activity_DRDY_DRV 0x01 +#define EVENT_MASK_FSB_data_activity_DRDY_OWN 0x02 +#define EVENT_MASK_FSB_data_activity_DRDY_OOTHER 0x04 +#define EVENT_MASK_FSB_data_activity_DBSY_DRV 0x08 +#define EVENT_MASK_FSB_data_activity_DBSY_OWN 0x10 +#define EVENT_MASK_FSB_data_activity_DBSY_OOTHER 0x20 + +// Cache L2 +#define EVENT_SEL_BSQ_cache_reference 0x0c +#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITS 0x001 +#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITE 0x002 +#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITM 0x004 + +#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITS 0x008 +#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITE 0x010 +#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITM 0x020 + +#define EVENT_MASK_BSQ_cache_reference_RD_L2_MISS 0x100 +#define EVENT_MASK_BSQ_cache_reference_RD_L3_MISS 0x200 +#define EVENT_MASK_BSQ_cache_reference_WR_L2_MISS 0x400 + +#endif + +/* End of $RCSfile: p4perf.h,v $ */ -- 2.30.2